In the present analysis we will study the interest of the AI community on a AI benchmarks. We focus on “interest” rather than “progress” for AI benchmarks as this is something we can compute using some proxies. In this particular case, we use the number of (normalised) hits (number of documents) obtained from AItopics per benchmark over the last decade (2008-2019). Note that the results from 2019 are incomplete.

The benchmarks in the present analysis rely on our own previous analysis and annotation of papers, as well as on open resources such as Papers With Code, including data from from several repositories (e.g, EFF, NLP-progress, SQuAD, RedditSota, etc.).

data = readRDS("interest_benchmarks_clean.rds")

interestPerYears <- function(data, years=2008:2019, interestYears = "All"){
  
  set.seed(288)
  
  # Mean Interest
  if (interestYears == "All"){
    data$mean.Interest <- (rowSums(select(data, as.character(years)))/length(years))
  }else{
    sel <- select(data, as.character(interestYears))
    data$mean.Interest <- (rowSums(sel)/length(interestYears))
  }
  data[data$mean.Interest == 0, "mean.Interest"]<- 0.0000000000001
  
  return(data)
  
  
  # interest <- select(data, one_of(c("keyword", cogAbs,"mean.Interest")))
  # keywords <- interest$keyword
  # rownames(interest) <- keywords
  # 
  # 
  # interest <- interest[,-1]
  # interest = colwise(type.convert)(interest)
  # rownames(interest) <- keywords
  # 
  # interest.mean <- interest$mean.Interest
  # interest <- interest[,-ncol(interest)]
  # interest.pond <- interest * interest.mean
  # 
  # return(list(interest.pond, interest))
  
}

prepareVis <- function(data, norm = T, years = 2008:2019, cogAbs = c("MP", "SI", "VP", "AP", "AS", "PA", "CE", "CO", "EC", "NV", "CL", "QL", "MS", "MC")) {
  
  
  set.seed(288)

  
  interest <- select(data, one_of(c("keyword", "category",cogAbs,"mean.Interest")))
  keywords <- interest$keyword
  categories <- interest$category
  rownames(interest) <- keywords
  interest <- interest[,-(1:2)]
  # interest = colwise(type.convert)(interest)
  interest.mean <- interest$mean.Interest
  
  
  # if(norm){
  #   # interest.mean <-  (interest.mean-min(interest.mean))/(max(interest.mean)-min(interest.mean))
  #   interest <- select(interest, -mean.Interest)
  #   interest.mean.norm <- normalize(interest.mean+0.000001, method = "scale", range = c(0,1))
  #   range(interest.mean.norm)
  #   interest.pond <- interest * interest.mean.norm
  #   return(list(interest.pond, interest, interest.mean, interest.mean.norm))
  #   
  # }else{
    interest <- select(interest, -mean.Interest)
    interest.pond <- interest * interest.mean
    return(list(interest.pond, interest, interest.mean))
  # }
  
}

plotVis <- function(data, categories, norm = T){
  
  set.seed(288)
  
  # shapes =  c("square", "triangle", "box", "circle", "dot", "star",
  #             "ellipse", "database", "text", "diamond", "square", "triangle","box")
  # vis$nodes$shape <- c(shapes[as.numeric(as.factor(categories))], rep("#dot",14))
  
  colours = c("1" = "blalck", "2" = "#543005","3" = "#8c510a","4" = "#bf812d",
              "5" = "#dfc27d","6" = "#f6e8c3","7" = "#f5f5f5","8" = "#c7eae5",
              "9" = "#80cdc1", "10" = "#35978f", "11" = "#01665e", "12" = "#003c30", "13" = "#FAFAFA")
  
  vis <- toVisNetworkData(graph_from_incidence_matrix(data, directed = F, weighted = T))
  
  vis$nodes$value = c(rep(10, nrow(vis$nodes)-14), colSums(data)*10000)
  vis$nodes$title <- vis$nodes$label
  vis$nodes$category <- c(categories, rep("CogAb", 14))
  vis$nodes$group <- vis$nodes$category 
  vis$nodes$color <- colours[as.numeric(as.factor(vis$nodes$category))]
  
  # vis$edges$value <- log(vis$edges$weight+1)
  vis$edges$value <- log(normalize(vis$edges$weight+0.00001, method = "range", range = c(0,1))+0.00001)
  
  # vis$edges$width <- vis$edges$weight
  
  v <- visNetwork(vis$nodes, vis$edges,  height = "1000px", width = "100%") %>% 
    visEdges(arrows = "to", color = list(color = 'rgba(70,130,180,0.3)', highlight ="#4682B4")) %>%
    visIgraphLayout(
      physics = F,
      randomSeed = 2017,
      layout = "layout_with_fr"
    ) %>%  
    visInteraction(navigationButtons = TRUE) %>% 
    visOptions(selectedBy = "group",highlightNearest = TRUE )
  
  return(v)
  
}

plotProgressPeriods <- function(data, periods){
  
  all <- data.frame()
  
  for (p in periods){
    dataPeriod = interestPerYears(data, interestYears = p)
    dataPeriod = prepareVis(dataPeriod)[[1]]
    dataPeriod$period = paste0(as.character(p), collapse = "-")
    dataPeriod$benchmark <- rownames(dataPeriod)
    melted <- melt(dataPeriod, id.vars = c("benchmark", "period"))
    all <- rbind(all, melted)
    
  }
  return(all)
  
}

Mean Interest per AI benchmak

In this two plots we show how the mean interest per AI benchmark has varied among different periods.

a <- ggplot(df.interest.m, aes(reorder(Benchmark,value), value, colour = variable)) + 
  geom_point(alpha = 1/3, size = 2.5) + xlab("") + ylab("Mean Interest") + 
  coord_flip() + theme_minimal() + theme(legend.position="bottom")

b <- ggplot(df.interest.m, aes(reorder(Benchmark,value), log(value), colour = variable)) + 
  geom_point(alpha = 1/3, size = 2.5) + xlab("") + ylab("Log(Mean Interest)") + 
  coord_flip() + theme_minimal() + theme(legend.position="bottom")

a
b

Mapping between AI benchmarks and Cognitive Abilities

Graphical representation

  • Benchmarks are grouped and coloured by area (groups from https://paperswithcode.com/)
  • Cognitive abilities are coloured in black and its size represent its relevance (total sum in the mapping) ponderated by interest (previous plot).
  • Edges represent that an ability is assigned to a task.
  • The width of the edges represent “interest” on the benchmark: the wider the edge, the more interest from the community during the last decade (mean).

Note that we can perform exactly the same analysis focusing on different (ranges of) years and obtaining the same graph but the width of the edges may vary (a little bit).

(Network are interactive!)

Period 2008-2010

plotVis(prepareVis(dataA)[[1]], categories = data$category)

Period 2011-2013

plotVis(prepareVis(dataB)[[1]], categories = data$category)

Period 2014-2016

plotVis(prepareVis(dataC)[[1]], categories = data$category)

Period 2017-2019

plotVis(prepareVis(dataD)[[1]], categories = data$category)

Relevance of the cognitive abilities in diferent periods

periods <- list(2008:2010, 2011:2013, 2014:2016, 2017:2019)

all <- plotProgressPeriods(data, periods)
## Warning in if (interestYears == "All") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (interestYears == "All") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (interestYears == "All") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (interestYears == "All") {: the condition has length > 1 and
## only the first element will be used
all.s <- summarise(group_by(all, period, variable), mean = mean(value))

ggplotly(ggplot(all.s, aes(variable,mean, fill = period)) + geom_bar(stat = "identity",position = "dodge") + xlab("") + ylab("Mean Interest") + 
           scale_fill_brewer(palette = "Paired") + theme_minimal())
periods <- 2008:2019

all <- plotProgressPeriods(data, periods)
all.s <- summarise(group_by(all, period, variable), mean = mean(value))

ggplotly(ggplot(all.s, aes(variable,mean, fill = period)) + geom_bar(stat = "identity",position = "dodge") + xlab("") + ylab("Mean Interest") + 
           scale_fill_brewer(palette = "Paired") + theme_minimal())

Interest per benchmark.

(Groups from https://paperswithcode.com/)

Computer Vision

plotIterest.Cat(interest.m, "Computer Vision")

Graphs

plotIterest.Cat(interest.m, "Graphs")

Natural Language Processing

plotIterest.Cat(interest.m, "Natural Language Processing")

Playing Games

plotIterest.Cat(interest.m, "Playing Games")

Miscellaneous

plotIterest.Cat(interest.m, "Miscellaneous")

Medical

plotIterest.Cat(interest.m, "Medical")

Methodology

plotIterest.Cat(interest.m, "Methodology")

Speech

plotIterest.Cat(interest.m, "Speech")

Reasoning

plotIterest.Cat(interest.m, "Reasoning")

Time Series

plotIterest.Cat(interest.m, "Time Series")

Computer Code

plotIterest.Cat(interest.m, "Computer Code")